In [1]:
import os
import numpy as np
import pandas as pd
import cv2
import matplotlib.pylab as plt
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pylab as plt
from IPython.display import display, clear_output
from ipywidgets import interact, widgets
In [2]:
csv_path = "CodingChallenge_v2/car_imgs_4000.csv"
images_folder = "CodingChallenge_v2/imgs"
In [3]:
df = pd.read_csv(csv_path)
df.head()
Out[3]:
| filename | perspective_score_hood | perspective_score_backdoor_left | |
|---|---|---|---|
| 0 | 66ee2d88-f403-4821-a370-8a3d72e200f9.jpg | 0.27352 | 0.439526 |
| 1 | bf7e237e-f12b-4ffc-8d79-6855cc35ea06.jpg | 0.00000 | 0.902682 |
| 2 | 4c297488-c81f-464d-92bf-1c408333a912.jpg | 0.00000 | 0.900864 |
| 3 | 6c95ad0b-fda6-42a8-a33d-b430073e1dcd.jpg | 0.00000 | 0.213162 |
| 4 | bd806d94-4c2e-4cd2-8e02-5eaba7c7c63e.jpg | 0.00000 | 0.818388 |
In [4]:
df.shape
Out[4]:
(4000, 3)
In [5]:
df[df.perspective_score_hood > 0].shape
Out[5]:
(1897, 3)
In [6]:
df.perspective_score_hood.mean(), df.perspective_score_backdoor_left.mean()
Out[6]:
(0.30303198100175, 0.31336786319750004)
This does not seem as a great imbalance, at least at first glance¶
In [7]:
fig = make_subplots(rows=1, cols=2)
fig.add_trace(go.Histogram(
x=df.perspective_score_hood, name="perspective_score_hood"), row=1, col=1)
fig.add_trace(go.Histogram(
x=df.perspective_score_backdoor_left, name="perspective_score_backdoor_left"), row=1, col=2)
Even scores have similar distribution, interesting¶
In [8]:
images = df.filename.to_list()
hood_scores = df.perspective_score_hood.to_list()
backdoor_scores = df.perspective_score_backdoor_left.to_list()
In [9]:
slider = widgets.IntSlider(
value=0,
min=0,
max=len(images)-1)
@interact(file=slider)
def plot_file(file):
img_path = f"{images_folder}/{images[file]}"
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
fig = plt.imshow(img)
plt.title(f"hood score: {hood_scores[file]}, backdoor score: {backdoor_scores[file]}")
There are quiet a lot of the images which are indeed a mixture of classes, also there are pictures which have none of the classes. This should also contribute to better generalization.¶
In [10]:
slider = widgets.IntSlider(
value=3709,
min=0,
max=len(images)-1)
@interact(file=slider)
def plot_file(file):
img_path = f"{images_folder}/{images[file]}"
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
fig = plt.imshow(img)
plt.title(f"hood score: {hood_scores[file]}, backdoor score: {backdoor_scores[file]}")
I have my doubts as for some pictures, where I would say scores are off. I will use them in the following training anyway, but my yellow flag would go after a discussion. For example, the following picture seem to contain left bakdoor while both scores are 0. I would have no problem with that, if the task would clearly state, that we need score more than 0 only for objects which fully get into the image¶
In [11]:
slider = widgets.IntSlider(
value=3289,
min=0,
max=len(images)-1)
@interact(file=slider)
def plot_file(file):
img_path = f"{images_folder}/{images[file]}"
img = cv2.imread(img_path)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
fig = plt.imshow(img)
plt.title(f"hood score: {hood_scores[file]}, backdoor score: {backdoor_scores[file]}")
I will get down to the main business and come back, if I have more time¶
In [ ]: